package check

import (
	"fmt"
	"github.com/gansidui/bktree"
)

type OSCLink struct {
	Link    string // 标题链接
	Content string // 内容
	Count   int    // 记录重复数
}

const (
	SimVar   float32 = 0.9 //相似度
	CountVar int     = 2   //有多少个互相重复则记录
)

//遍历比较
func Check(links []OSCLink) {
	for i := 0; i < len(links); i++ {
		for j := i + 1; j < len(links); j++ {
			sim := GetSimilarity(links[i].Content, links[j].Content)
			if sim >= SimVar {
				links[i].Count = links[i].Count + 1
				links[j].Count = links[j].Count + 1
				// fmt.Printf("%s,%s : %d,%d,%f\n", links[i].Content, links[j].Content, links[i].Count, links[j].Count, sim)
			}
		}
	}
	Report(links)
}

//提取相似的结果
func Report(links []OSCLink) {
	for _, link := range links {
		if link.Count >= (CountVar - 1) {
			fmt.Printf("%s: %d\n", link.Content, link.Count)
		}
	}
}

//获取相似度
func GetSimilarity(str1 string, str2 string) float32 {
	if len(str1) == 0 {
		return float32(1 - len(str2))
	}
	if len(str2) == 0 {
		return float32(1 - len(str1))
	}
	maxlen := 0
	if len(str1) >= len(str2) {
		maxlen = len(str1)
	} else {
		maxlen = len(str2)
	}
	x := bktree.Levenshtein(str1, str2)
	sim := float32(x) / float32(maxlen)
	// fmt.Printf("%s,%s: %f\n", str1, str2, 1-sim)
	return float32(1 - sim)

}
